# ------------------------------------------------------------------------------
# Gets csv of most common zocats (by count)
# Author: Cassidy Shubatt <cshubatt@gmail.com>
# To run: bash 06_get_common_zocats.sh
# ------------------------------------------------------------------------------

# Libraries --------------------------------------------------------------------
library(here)
library(yaml)
library(data.table)
library(tidyverse)
library(feather) # read_feather
library(glue)

u <- modules::use(here::here("lib", "util.R"))
temp <- here("code", "02_prep_and_summarize_cohort", "temp")

# Load Data --------------------------------------------------------------------
message("Loading data...")
split <- "random"
paths <- read_yaml(here::here("lib", "filepaths.yml"))
x <- readRDS(glue(paths$features$test))

# Select ZCs -------------------------------------------------------------------
message("Selecting zocat features...")
zc_feats <- colnames(x) %>%
  .[grepl("zc_cat_name", .)] %>%
  .[grepl("t2yall", .)] %>%
  .[grepl("count", .)]

zc_x <- x[, zc_feats] %>%
  as.matrix() %>%
  as.data.frame()
zc_x <- zc_x > 0

# Summing ZCs ------------------------------------------------------------------
message("Summing ZC counts...")
counts <- colSums(zc_x)
counts_df <- tibble(zocat = names(counts), count = counts) %>%
  .[order(-.$count), ] %>%
  mutate(zocat = str_remove(zocat, "dia_t2yall_zc_cat_name_count_"))

# Save
message("Saving...")
write_rds(counts_df, file.path(temp, "zc_counts.rds"))
write_csv(counts_df, file.path(temp, "zc_counts.csv"))

message("Done.")
